*! version 5.0
* 13 August 2018
* NIDS
* Master Income do file updated for Nids Wave 4

* THIS IS 4th INCOME DO FILE - PREPARING VARIABLES FOR IMPUTATION: 4 OF 7
* THIS DO FILE PREPARES THE RELEVANT INCOME VARIABLES FOR IMPUTATION

*=====================================================================================================================================
* GLOBALS FOR DATA FILES, DO FILES AND VERSION SUFFIXES

* DEFINED IN "Master Income do file (1 of 7)" 

version 12.1

*=====================================================================================================================================

* USING THE ADULT AND INDDERIVED FILES TO CREATE A BIOCHILDREN VARIABLE
* Counting the number of biological children 18 or younger still living in this household


*Reshaping the adult questionnaire into long format (birth history variables)

use "$DataIN\Adult_$VersionIN.dta", clear

rename w4_a_* *

keep pid w4_hhid bhres* bhchild_id*
reshape long bhchild_id bhres, i(pid) j(child)

gen issue=1 if bhres==1 & bhchild_id==.
gen issue_d=issue!=.
label variable issue_d "resisdent children with no id"
rename pid m_pid
rename bhchild_id pid

*Merging with individual derived file
drop if pid == .
duplicates drop pid, force   //TEMP FIX. MICHELLE TO DROP ONCE BH SECTION IS FIXED

merge 1:1 pid using "$DataIN\indderived_$VersionIN.dta"
drop if _m == 2
drop _merge

*drop w4_hhid
rename w4_* *
rename hhid w4_hhid
gen hhchild=0
replace hhchild = 1 if best_age_yrs<=18 & best_age_yrs>=0

gen biochild=0
replace biochild=1 if m_pid!=. & hhchild==1
egen biochildren = sum(biochild), by(m_pid)

keep pid m_pid w4_hhid biochildren issue_d
drop pid
rename m_pid pid
keep pid w4_hhid biochildren issue_d

duplicates drop pid, force


*------------------------------------------------------------------------------------------------------------------------------------


* MERGING WITH THE DATASET CREATED IN THE DO FILE "Income - Merging datasets to create income variables (2 of 7)"
merge 1:1 pid  using "$DataOUT\data.dta"
drop _merge

*MERGE IN AGRICULTURE VARIABLE
merge m:1 w4_hhid using "$DataOUT\hhagric.dta"
drop if _merge == 2
drop _merge

*-------------------------------------------------------------------------------------------------------------------------------------

* RESPONSE STATUS
gen response=1 if outcome==1
label variable response "Did individual respond or not (incl. proxies)"
label values response dummy

*Variable that indicates the outcome of attempted sampling of this unit
gen responseoutcome=response
replace responseoutcome=2 if proxy==1
label define responseoutcome 0 "Non-response" 1 "Adult" 2 "Proxy"
label values responseoutcome responseoutcome 
*===============================================================================================================================

* DEMOGRAPHICS

* Gender
********
recode best_gen (1=1 "Male") (2=0 "Female"), gen(male)
replace male=roster_male if male==.
label variable male "Male dummy"
gen male_d=male!=.

* Race
******
rename best_race race
recode race (min/0=.)
gen race_d=race!=.
recode race (1=1 "African") (2/4=0 "Non-African"), gen(african)
recode race (2=1 "Coloured") (3/4=0 "Non-Coloured") (1=0 "Non-Coloured") , gen(coloured)
recode race (3=1 "Asian_Indian") (1/2=0 "Non-asian_indian") (4=0 "Non-asian_indian"), gen(asian_indian)
recode race (4=1 "White") (1/3=0 "Non-White"), gen(white)
recode race (5=1 "Other") (1/4=0 "Non-Other"), gen(other)

* Age
******
recode best_age_yrs (min/-1=.)
replace best_age_yrs=0 if best_age_yrs==.
gen age_d=best_age_yrs==0
gen agesq=best_age_yrs^2
label variable agesq "Age squared at interview"
gen agecu=best_age_yrs^3
label variable agecu "Age cubed at interview"

* Schooling
************
recode best_edu (-9/0=0) (1=1 "Grade 1") (2=2 "Grade 2") (3=3 "Grade 3") (4=4 "Grade 4") (5=5 "Grade 5") ///
(6=6 "Grade 6") (7=7 "Grade 7") (8=8 "Grade 8") (9=9 "Grade 9") (10=10 "Grade 10") (11=11 "Grade 11") ///
(12=12 "Grade 12") (13=10) (14=11) (15=12) (25=0 "No schooling") (16/24=12) (26/max=.), gen(schooling)
gen schooling_d=schooling!=.
gen schoolingsq=schooling^2
label variable schooling "Schooling"

* Education dummies
recode best_edu (-9/0=0) (0/12=0) (12/15=0) (25=0), gen(postschool)
replace postschool=0 if best_edu<0 & best_edu>=-9 & schooling<12
gen postschool_d=postschool!=.
tab postschool, gen(best_edu)
rename best_edu2 cert_nomat
rename best_edu3 dip_nomat
rename best_edu4 cert_mat
rename best_edu5 dip_mat
rename best_edu6 bachelors
rename best_edu7 bach_dip
rename best_edu8 honours
rename best_edu9 postgrad
rename best_edu10 othered

* Number of post-school years
recode postschool (16/17=1) (18/19=1) (20=3) (21=4) (22=4) (23=6) (24=0), gen(postschoolyears)

* Proxy for work experience
gen experience=best_age_yrs-6-schooling-postschoolyears

* Trade union membership
gen tradeunion=em1tru==1
label variable tradeunion "Trade union membership"

*------------------------------------------------------------------------------------------------------------------------------------

* "OTHER" INCOME

gen othe_rec=inco==1
replace othe_rec=. if response!=1
recode inco_v (-9/0=.), gen(othe)
label variable othe "Income from other sources"
gen othe_d=othe!=.
label variable othe_d "Respondent has non-missing other income data"
label values othe_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* MAIN WAGE VARIABLES

*Only imputing net wage

*Earning a wage for main or secondary job
gen working=1 if em1==1
replace working=1 if prox_emp==1 
replace working=0 if working!=1 & response==1
gen working2=1 if em2==1
replace working2=0 if working2!=1 & response==1

* Main Job
* Net point estimates
recode em1pay (-9/0=.) (500000/max=.), gen(fwag_p)
*Net brackets
gen temp=.
replace temp=375 if em1inc_brac4==3						//<750
replace temp=750 if em1inc_brac4==2						//=750
replace temp=1125 if em1inc_brac4==1 & em1inc_brac2==3	//>750 & <1500
replace temp=1500 if em1inc_brac2==2					//=1500
replace temp=2250 if em1inc_brac2==1 & em1inc_brac1==3	//>1500 & <3000
replace temp=3000 if em1inc_brac1==2					//=3000
replace temp=4500 if em1inc_brac1==1 & em1inc_brac3==3	//>3000 & <6000
replace temp=6000 if em1inc_brac3==2					//=6000
replace temp=9000 if em1inc_brac3==1 & em1inc_brac5==3	//>6000 & <12000
replace temp=12000 if em1inc_brac5==2					//=12000
replace temp=18000 if em1inc_brac5==1 & em1inc_brac6==3	//>12000 & < 24000
replace temp=24000 if em1inc_brac6==2					//=24000
replace temp=48000 if em1inc_brac6==1					//>24000


gen fwag_ib=temp
drop temp
*Proxy income brackets
gen temp=.
replace temp=375 if prox_em1inc_s4==3						//<750
replace temp=750 if prox_em1inc_s4==2						//=750
replace temp=1125 if prox_em1inc_s4==1 & prox_em1inc_s2==3	//>750 & <1500
replace temp=1500 if prox_em1inc_s2==2						//=1500
replace temp=2250 if prox_em1inc_s2==1 & prox_em1inc_s1==3	//>1500 & <3000
replace temp=3000 if prox_em1inc_s1==2						//=3000
replace temp=4500 if prox_em1inc_s1==1 & prox_em1inc_s3==3	//>3000 & <6000
replace temp=6000 if prox_em1inc_s3==2						//=6000
replace temp=8500 if prox_em1inc_s3==1 & prox_em1inc_s5==3	//>6000 & <11000
replace temp=11000 if prox_em1inc_s5==2						//=11000
replace temp=17000 if prox_em1inc_s5==1 & prox_em1inc_s6==3	//>11000 & <27000
replace temp=27000 if prox_em1inc_s6==2						//=27000
replace temp=54000 if prox_em1inc_s6==1						//>27000

gen fwag_pib=temp
drop temp

*Cleaning up proxy brackets
replace fwag_pib=. if prox_emp!=1
gen fwag_pib_d=fwag_pib!=.
label values fwag_pib_d dummy
label variable fwag_pib_d "Dummy to indicate if net wages were sourced from proxy income brackets"
replace fwag_ib=fwag_pib if proxy==1 & fwag_pib!=. & fwag_ib==.
gen fwag_ib_d=fwag_ib!=.
label values fwag_ib_d dummy
label variable fwag_ib_d "Dummy to indicate if net wages were sourced from income brackets"

*Second job
recode em2pay (-9/0=.), gen(fwag_p2)
label variable fwag_p2 "Net pay from second job"
gen temp=.
replace temp=125 if em2inc_brac4==3						//<250
replace temp=250 if em2inc_brac4==2						//=250
replace temp=375 if em2inc_brac4==1 & em2inc_brac2==3	//>250 & <500
replace temp=500 if em2inc_brac2==2						//=500
replace temp=750 if em2inc_brac2==1 & em2inc_brac1==3	//>500 & <1000
replace temp=1000 if em2inc_brac1==2					//=1000
replace temp=1750 if em2inc_brac1==1 & em2inc_brac3==3	//>1000 & <2500
replace temp=2500 if em2inc_brac3==2					//=2500
replace temp=3750 if em2inc_brac3==1 & em2inc_brac5==3	//>2500 & <5000
replace temp=5000 if em2inc_brac5==2					//=5000
replace temp=10000 if em2inc_brac5==1 & em2inc_brac6==3	//>5000 & <15000
replace temp=15000 if em2inc_brac6==2					//=15000
replace temp=30000 if em2inc_brac6==1					//>15000

gen fwag_p_ib2=temp
drop temp
gen fwag2=fwag_p2
replace fwag2=fwag_p_ib2 if fwag_p_ib2!=. & fwag2==.
label variable fwag2 "Net pay from second job"

*Net wages incorporating income bracket and proxy income bracket net wages as well as second job wages
gen fwag=fwag_p
label variable fwag "Monthly take home pay from main job including brackets"
replace fwag=fwag_ib if fwag_ib!=. & fwag==.
egen fwag_temp=rowtotal(fwag fwag2), mi
replace fwag=fwag_temp
drop fwag_temp
replace fwag=. if fwag==0
label variable fwag "Take-home pay including brackets"

gen fwag_rec=working==1 | working2==1
replace fwag_rec=. if response==0
gen fwag_d=fwag!=.
lab var fwag_d "Respondent has non-missing net wage data"
lab val fwag_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* NUMBER OF HOURS WORKED PER WEEK/MONTH

*Working hours data
recode em1hrs (-9/0=.), gen(weeklyhours)
recode em2hrs  (-9/0=.), gen(weeklyhours2)
egen weeklyhours_temp=rowtotal(weeklyhours weeklyhours2), m
replace weeklyhours=weeklyhours_temp
replace weeklyhours=. if weeklyhours>126 | weeklyhours==0
gen monthlyhours=weeklyhours*(22/5)
quietly sum monthlyhours, detail
return list
*Assuming that all of those without hours data work the median of hours.
gen imputemonthlyhours=0 if working==1
replace imputemonthlyhours=1 if monthlyhours==. & working==1
replace monthlyhours=r(p50) if monthlyhours==. & working==1

*-----------------------------------------------------------------------------------------------------------------------------------

* BONUS INCOME

*13th cheque
gen cheq_rec=em1cheqlm==1
replace cheq_rec=. if response==0
recode em1cheqlm_a (-9/0=.), gen(cheq)
lab var cheq "Monthly income from cheq_pa from main job"
gen cheq_d=cheq!=.
label variable cheq_d "Respondent has non-missing 13th cheque data"
label values cheq_d dummy

*Profit share
gen prof_rec=em1prflm==1
replace prof_rec=. if response==0
recode em1prflm_a (-9/0=.), gen(prof)
label variable prof "Monthly income from prof_pa from main job"
gen prof_d=prof!=.
label variable prof_d "Respondent has non-missing profit share data"
label values prof_d dummy

*Bonus
gen bonu_rec=em1bonlm==1
replace bonu_rec=. if response==0
recode em1bonlm_a (-9/0=.), gen(bonu)
label variable bonu "Monthly income from other bonuses from main job"
gen bonu_d=bonu!=.
label variable bonu_d "Respondent has non-missing other bonus data"
label values bonu_d dummy

*Piece-rate income
gen extr_rec=em1pcrtlm==1
replace extr_rec=. if response==0
recode em1pcrtlm_a (-9/0=.) , gen(extr)
label variable extr "Monthly income on a piece rate basis"
gen extr_d=extr!=.
label variable extr_d "Respondent has non-missing piece-rate data"
label values extr_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* CASUAL LABOUR VARIABLES

*Casual labour dummy
gen cwag_rec=emc==1
replace cwag_rec=. if response==0

*Casual work wages
recode emcinc (-9/0=.), gen(cwag_p)
label variable cwag_p "Income from casual job"

*Income from casual work in brackets
gen temp=.
replace temp=125 if emcinc_brac4==3
replace temp=250 if emcinc_brac4==2
replace temp=375 if emcinc_brac4==1 & emcinc_brac2==3
replace temp=500 if emcinc_brac2==2
replace temp=750 if emcinc_brac2==1 & emcinc_brac1==3
replace temp=1000 if emcinc_brac1==2
replace temp=1250 if emcinc_brac1==1 & emcinc_brac3==3
replace temp=1500 if emcinc_brac3==2
replace temp=2250 if emcinc_brac3==1 & emcinc_brac5==3
replace temp=3000 if emcinc_brac5==2
replace temp=4250 if emcinc_brac5==1 & emcinc_brac6==3
replace temp=5500 if emcinc_brac6==2
replace temp=11000 if emcinc_brac6==1 

gen cwag_p_ib=temp
drop temp

*Income from casual work, point estimates and brackets
gen cwag=cwag_p
label variable cwag "Monthly net pay from casual work including brackets"
replace cwag=cwag_p_ib if cwag==. & cwag_p_ib!=.
gen cwag_d=cwag!=.
label variable cwag_d "Respondent has non-missing casual pay data including brackets"
label values cwag_d dummy

*Hourly casual wages variables
recode emchrs (-9/0=.), gen(cmonthlyhours)
replace cmonthlyhours=. if cmonthlyhours>550 & cmonthlyhours!=.
quietly sum cmonthlyhours, detail
return list
*Assuming that all of those without hours data work the median of hours 
gen cimputemonthlyhours=1 if cmonthlyhours==. & cwag_rec==1
replace cimputemonthlyhours=1 if cimputemonthlyhours!=1 & cwag_rec==1
replace cmonthlyhours=r(p50) if cmonthlyhours==. & cwag_rec==1

*-----------------------------------------------------------------------------------------------------------------------------------

* SELF-EMPLOYMNET

*Self-employed
gen sworking=ems==1
replace sworking=. if response==0
replace sworking=0 if response==1 & sworking==.
replace sworking=1 if prox_emp==2

*Earnings from self-employment
recode emsincifr (-9/0=.), gen(swag_p)
recode emsincfr_a (-9/0=.), gen(temp)
replace swag_p=temp if swag_p==. & temp!=.
drop temp
label variable swag_p "Earnings from self-employment"
gen swag_p_d=swag_p!=.
label variable swag_p_d "Respondent has non-missing self-employment data"
label values swag_p_d dummy

*Earnings from self-employment in brackets (part 1)
gen temp=.
replace temp=125 if emsincifr_brac4==3
replace temp=250 if emsincifr_brac4==2
replace temp=375 if emsincifr_brac4==1 & emsincifr_brac2==3
replace temp=500 if emsincifr_brac2==2
replace temp=750 if emsincifr_brac2==1 & emsincifr_brac1==3
replace temp=1000 if emsincifr_brac1==2
replace temp=1750 if emsincifr_brac1==1 & emsincifr_brac3==3
replace temp=2500 if emsincifr_brac3==2
replace temp=3750 if emsincifr_brac3==1 & emsincifr_brac5==3
replace temp=5000 if emsincifr_brac5==2
replace temp=10000 if emsincifr_brac5==1 & emsincifr_brac6==3
replace temp=15000 if emsincifr_brac6==2
replace temp=30000 if emsincifr_brac6==1

gen swag_p_ib=temp
drop temp

*Earnings from self-employment in brackets (part 2)
gen temp=.
replace temp=250 if emsincfr_brac4==3
replace temp=500 if emsincfr_brac4==2
replace temp=1000 if emsincfr_brac4==1 & emsincfr_brac2==3
replace temp=1500 if emsincfr_brac2==2
replace temp=2250 if emsincfr_brac2==1 & emsincfr_brac1==3
replace temp=3000 if emsincfr_brac1==2
replace temp=4500 if emsincfr_brac1==1 & emsincfr_brac3==3
replace temp=6000 if emsincfr_brac3==2
replace temp=9000 if emsincfr_brac3==1 & emsincfr_brac5==3
replace temp=12000 if emsincfr_brac5==2
replace temp=16000 if emsincfr_brac5==1 & emsincfr_brac6==3
replace temp=20000 if emsincfr_brac6==2
replace temp=40000 if emsincfr_brac6==1

replace swag_p_ib=temp if swag_p==. & temp!=.
drop temp

/*For self-employed proxies*/
gen temp=.
replace temp=375 if prox_em1inc_s4==3
replace temp=750 if prox_em1inc_s4==2
replace temp=1125 if prox_em1inc_s4==1 & prox_em1inc_s2==3
replace temp=1500 if prox_em1inc_s2==2
replace temp=2250 if prox_em1inc_s2==1 & prox_em1inc_s1==3
replace temp=3000 if prox_em1inc_s1==2
replace temp=4500 if prox_em1inc_s1==1 & prox_em1inc_s3==3
replace temp=6000 if prox_em1inc_s3==2
replace temp=8500 if prox_em1inc_s3==1 & prox_em1inc_s5==3
replace temp=11000 if prox_em1inc_s5==2
replace temp=19000 if prox_em1inc_s5==1 & prox_em1inc_s6==3
replace temp=27000 if prox_em1inc_s6==2
replace temp=54000 if prox_em1inc_s6==1

gen swag_p_pib=temp
drop temp

replace swag_p_pib=. if prox_emp!=2
replace swag_p_ib=swag_p_pib if swag_p_pib!=. & swag_p_ib==.

*Income from self-employment, point estimates and brackets
gen swag=swag_p
label variable swag "Monthly income from self-employment including brackets"
replace swag=swag_p_ib if swag==. & swag_p_ib!=.
replace swag=. if swag==0
replace swag=. if sworking!=1

gen swag_rec=sworking
gen swag_d=swag!=.
label variable swag_d "Respondent has non-missing self employment earnings data including brackets"
label values swag_d dummy

*Hourly self-employment wages
recode emshrs (-9/0=.), gen(sweeklyhours)
gen smonthlyhours=sweeklyhours*(22/5)
replace smonthlyhours=. if smonthlyhours>550 & smonthlyhours!=.
quietly sum smonthlyhours, detail
return list
*Assuming that all of those without hours data work the median of hours 
gen simputemonthlyhours=1 if smonthlyhours==. & sworking==1
replace simputemonthlyhours=1 if simputemonthlyhours!=1 & sworking==1
replace smonthlyhours=r(p50) if smonthlyhours==. & sworking==1

*-----------------------------------------------------------------------------------------------------------------------------------

* INCOME FROM HELPING IN A FRIEND'S BUSINESS

*Helping a friend
gen help_rec=1 if emhearn==1
replace help_rec=0 if help_rec==. & response==1
replace help_rec=. if response==0

*Income from helping friend
recode emhearn_v (-9/0=.), gen(help)
label variable help "Monthly income from helping friends with their business"
replace help=. if help==0

gen help_d=help!=.
label variable help_d "Respondent has non-missing help-friend data"
label values help_d dummy

*Hours of work helping friends with business
recode emhhrs (-9/-3=.) (140/max=.), gen(hf_weeklyhours)
gen hf_monthlyhours=hf_weeklyhours*(22/5)
quietly sum hf_monthlyhours, detail
return list
*Assuming that all of those without hours data work the median of hours 
gen imp_hf_monthlyhours=1 if hf_monthlyhours==. & help_rec==1
replace imp_hf_monthlyhours=1 if imp_hf_monthlyhours!=1 & help_rec==1
replace hf_monthlyhours=r(p50) if hf_monthlyhours==. & help_rec==1

*Hourly earnings from helping friend
gen hr_help=help/hf_monthlyhours
gen lnhr_help=ln(hr_help)

*-----------------------------------------------------------------------------------------------------------------------------------

* STATE OLD AGE PENSION

/*Amount of the SOAP:
	- Before 1 April 2012: 1140
	- After  1 April 2012: 1200 (If you are older than 75 yrs, you will get 1200 + 20)
	- 2014:  R1350 per month or R1370 per month if older than 75
	- 2015:  R1410pm and R1430 if older than 75
All state pensions above a threshold of R1430 per month are reallocated to private pensions.*/

*Receive State Old Age Pension
gen spen_rec=incgovpen==1
replace spen_rec=. if response==0

*Income from SOAP
recode incgovpen_v (-9/0=.), gen(spen)
label variable spen "Monthly income from state old age pension"

replace spen=1200 if spen==12000
replace spen=1220 if spen==12200
replace spen=1350 if spen==13500
replace spen=1370 if spen==13700
replace spen=1400 if spen==14000
replace spen=1410 if spen==14100

*Reallocating high amounts to private pension 
replace spen_rec=0 if spen>1430 & spen!=.
gen temp_ppen=spen if spen>1430 & spen!=.
replace spen=. if temp_ppen!=.

gen spen_d=spen!=.
label variable spen_d "Respondent has non-missing state pension data"
label values spen_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* PRIVATE OR FOREIGN PENSIONS

*Receive a pension or provident fund
gen ppen_rec=1 if incpfnd==1 | incret==1 | temp_ppen!=.
replace ppen_rec=0 if ppen_rec!=1 & response==1

*Income from private retirement funding
recode incpfnd_v (-9/0=.), gen(temp1)
recode incret_v (-9/0=.), gen(temp2)
egen ppen=rowtotal(temp1 temp2 temp_ppen), m
label variable ppen "Monthly income from pension/provident fund and retirement annuities"
drop temp*

gen ppen_d=ppen!=.
label variable ppen_d "Respondent has non-missing private retirement funding income data"
label values ppen_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* UNEMPLOYMENT INSURANCE FUND (UIF)

*Receive UIF
gen uif_rec=incuif==1
replace uif_rec=. if response==0

*Income from UIF
recode incuif_v (-9/0=.), gen(uif)
label variable uif "Monthly income from UIF payments"
replace uif=. if uif>20000 

gen uif_d=uif!=.
label variable uif_d "Respondent has non-missing uif income data"
label values uif_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* WORKMEN'S COMPENSATION

*Receive workmen's compensation
gen comp_rec=incwc==1
replace comp_rec=. if response==0

*Income from workmen's compensation
recode incwc_v (-9/0=.), gen(comp)
label variable comp "Monthly income from workmen's compensation payments"

gen comp_d=comp!=.
label variable comp_d "Respondent has non-missing workmen's compensation income data"
label values comp_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* DISABILITY GRANT

*Receive disability grant
gen dis_rec=incdis==1
replace dis_rec=. if response==0

*Income from disability grant
recode incdis_v (-9/0=.), gen(dis)
label variable dis "Monthly income from disability grant payments"

gen dis_d=dis!=.
label variable dis_d "Respondent has non-missing disability grant data"
label values dis_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* CHILD SUPPORT GRANT

*Receive child support grant
gen chld_rec=incchld==1
replace chld_rec=. if response==0

*Income from child support grant
recode incchld_v (-9/0=.), gen(chld)
label variable chld "Monthly income from child support grant"
replace chld=. if chld_rec!=1

gen chld_d=chld!=.
label variable chld_d "Respondent has non-missing child support grant data"
label values chld_d dummy

/*Number of children living with adult females.
This will not be the same as biochildren which is the number of bio children under 15
still living with this adult female.  It uses different data and includes children of any age.*/
recode bhlive_n (-3/0=.), gen(biochild)
replace biochild=0 if  bhbrth==2 | bhlive==2
gen biochild_nodata=1 if biochild==. & chld_rec==1
replace biochild_nodata=0 if biochild!=. & chld_rec==1
replace biochild=0 if biochild==. & chld_rec==1
replace biochild=hhchildren if biochild>hhchildren & hhchildren!=. & biochild!=.

*Household level biochild variable
egen hh_biochild=sum(biochild), by(w4_hhid)

*-----------------------------------------------------------------------------------------------------------------------------------

* FOSTER CARE GRANT

*Receive foster care grant
gen fost_rec=incfos==1
replace fost_rec=. if response==0

*Income from foster care grant
recode incfos_v (-9/0=.), gen(fost)
label variable fost "Monthly income from foster care grant"

gen fost_d=fost!=.
label variable fost_d "Respondent has non-missing foster care grant data"
label values fost_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* CARE DEPENDANCY GRANT

*Receive care dependency grant
gen cdep_rec=inccare==1
replace cdep_rec=. if response==0

*Income from care dependency grant
recode inccare_v (-9/0=.), gen(cdep)
label variable cdep "Monthly income from care dependency grant"

gen cdep_d=cdep!=.
label variable cdep_d "Respondent has non-missing care dependency grant data"
label values cdep_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* INTREST / DIVIDENT INCOME

*Receive interest/dividend income
gen indi_rec=incint==1
replace indi_rec=. if response==0

*Income from interest/dividends
recode incint_v (-9/0=.), gen(indi)
label variable indi "Monthly income from interest/dividends"

gen indi_d=indi!=.
label variable indi_d "Respondent has non-missing interest/dividend income data"
label values indi_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* INHERITANCE

*Receive inheritance income
gen inhe_rec=incinh==1
replace inhe_rec=. if response==0

*Income from inheritance
recode incinh_v (-9/0=.), gen(inhe)
label variable inhe "Monthly income from inheritances"

gen inhe_d=inhe!=.
label variable inhe_d "Respondent has non-missing inheritance income data"
label values inhe_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* WAR VETERANS PENSION

gen war_rec=incwar==1
replace war_rec=. if response==0 

recode incwar_v (-9/0=.), gen(war_income)
egen temp=rowtotal(othe war_income), m
replace othe=temp
drop temp
replace othe_rec=1 if othe!=. & othe_rec==0
replace othe_d=1 if othe!=.

*-----------------------------------------------------------------------------------------------------------------------------------

* RENTAL INCOM

*Receive rental income
gen rnt_rec=incrnt==1
replace rnt_rec=. if response==0

*Rental income
recode incrnt_v (-9/0=.), gen(rnt)
label variable rnt "Monthly income from rentals"

gen rnt_d=rnt!=.
label variable rnt_d "Respondent has non-missing rental income data"
label values rnt_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* RETRENCHMENT / RETIREMENT PACKAGE

*Receive retrenchment/retirement package
gen retr_rec=incretr==1 | incretp==1
replace retr_rec=. if response==0

*Income from retrenchment package
recode incretr_v (-9/0=.), gen(temp1)
recode incretp_v(-9/0=.), gen(temp2)
egen retr=rowtotal(temp1 temp2), m
label variable retr "Monthly income from retrenchment package"
drop temp*

gen retr_d=retr!=.
label variable retr_d "Respondent has non-missing retrenchment payment data"
label values retr_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* LABOLA / BRIDE WEALTH PAYMENTS

*Receive lobola/bride wealth payment
gen brid_rec=inclob==1
replace brid_rec=. if response==0

*Income lobola/bride wealth payment
recode inclob_v (-9/0=.), gen(brid)
label variable brid "Monthly income from lobola/bride wealth payments"

gen brid_d=brid!=.
label variable brid_d "Respondent has non-missing lobola/bride wealth payment data"
label values brid_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* GIFT INCOME

*Receive gift income
gen gift_rec=incgif==1
replace gift_rec=. if response==0

*Income from gifts
recode incgif_v (-9/0=.), gen(gift)
label variable gift "Monthly income from gifts"

gen gift_d=gift!=.
label variable gift_d "Respondent has non-missing gift income data"
label values gift_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* REPAYMENT OF LOANS TO YOU

*Receive repayment of loans income
gen loan_rec=incloan==1
replace loan_rec=. if response==0

*Income from repayments of loans to you
recode incloan_v (-9/0=.), gen(loan)
label variable loan "Monthly 'income' from loan repayments"

gen loan_d=loan!=.
label variable loan_d "Respondent has non-missing repayment of loans income data"
label values loan_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* SALE OF HOUSEHOLD GOODS

*Receive income from sale of household goods
gen sale_rec=incsale==1
replace sale_rec=. if response==0

*Income from sale of household goods
recode incsale_v (-9/0=.), gen(sale)
label variable sale "Monthly 'income' from sale of household goods"

gen sale_d=sale!=.
label variable sale_d "Respondent has non-missing sale of household goods income data"
label values sale_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* REMITTANCES 

*Receive remittance income
gen remt_rec=cr==1
replace remt_rec=. if response==0

*Income from remittances

*Money remittances
foreach x of numlist 1/10 {
cap recode crmnv`x' (-9/0=.), gen(temp`x')
}

*Values of in-kind remittances

gen temp=1
while temp<=10 {
local x=temp
local y=temp+10
capture recode crkndmnv`x' (-9/0=.), gen(temp`y')
replace temp=temp+1
}
drop temp

*Aggregating remittances
egen remt=rowtotal(temp*), m
label variable remt "Monthly income from all remittances"
drop temp*

gen remt_d=remt!=.
label variable remt_d "Respondent has non-missing remittances received data"
label values remt_d dummy

*-----------------------------------------------------------------------------------------------------------------------------------

* HOUSEHOLD VARIABLES FOR HOUSEHOLD LEVEL INCOME IMPUTATIONS

*Household one-shot income
sort w4_hhid
gen hhq_incb_rec=1
replace hhq_incb_rec=. if w4_hh_outcome!=1
replace hhq_incb=. if hhq_incb==0

gen hhq_incb_d=hhq_incb!=.
label values hhq_incb_d dummy 


*Household mode race
egen hhrace=mode(race), by(w4_hhid) minmode
egen hhracetemp=mode(race), by(w4_hhid) maxmode
replace hhrace=hhracetemp if hhrace==.
drop hhracetemp
gen hhrace_d=hhrace!=.
replace hhrace=0 if hhrace==.

*Maximum household education======
egen hhedu=max(schooling), by(w4_hhid)
egen hhpostedu=max(postschool), by(w4_hhid)
recode hhpostedu (16/17=12) (18/19=13) (20/21=15) (22=16) (23=17) (24=.)
replace hhedu=hhpostedu if hhpostedu!=0 & hhpostedu!=.
gen hhedusq=hhedu^2

*Median household age
egen hhage=median(best_age_yrs) if best_age_yrs>=0 & best_age_yrs<110, by(w4_hhid)
sort w4_hhid hhage
replace hhage=hhage[_n-1] if hhage==. & w4_hhid==w4_hhid[_n-1]

gen hhage_d=hhage!=.
replace hhage=0 if hhage==.

*Dummy variable for a trade union member present in household
egen hhtu=max(tradeunion), by(w4_hhid)

save "$DataOUT\prepdata.dta", replace

* end of do file 

*=====================================================================================================================================
